#Details: ### This is to do a GSEA analysis using ONLY the MsigDB Hallmark pathways ### This is not done with clusterprofiler that I could find ### Clusterprofiler uses the Biological processes, molecular funtion, and cellular component options ### Generally speaking, what we want appears to be more niche than the base GSEA options ### So the next option is to find something that incorporates the MsigDB hallmark option #### What I have found is the CRAN package msigdbr and I am going to attempt that and see what happens #### I have checked for incorporation of msigdbr and clusterprofiler, and there is a sketchy documented way to do it
##CRAN Project Vignette: https://cran.r-project.org/web/packages/msigdbr/vignettes/msigdbr-intro.html
https://yulab-smu.top/biomedical-knowledge-mining-book/semantic-similarity-overview.html
Perks of the package: There are all of them msigdb options present (that I know of) AND multiple species It is ridiculously basic Formats make the tibble function easy to use without extra data wrangling
Set up (from the Vignette)
#this is from the first go round
gse <- GSEA(gene_list, minGSSize = 15,
maxGSSize = 500,
nPermSimple= 1000,
pvalueCutoff = 0.05,
verbose = TRUE,
pAdjustMethod = "none", TERM2GENE= m_t2g)
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (8.24% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
Adjusting for Min, Max, and Permutations to be the same as GSEA software Min Size: 15, Max Size 500, Permutations 1000 and no p adj method
require(DOSE)
## Loading required package: DOSE
## DOSE v3.20.1 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
dotplot(gse, showCategory=10, split=".sign") + facet_grid(.~.sign) + theme(axis.text.y = element_text(size = 4))
require(DOSE)
dotplot(gse, showCategory=20, split=".sign") + facet_grid(.~.sign) + theme(axis.text.y = element_text(size = 4))
gse<-pairwise_termsim(gse, method = "JC", semData = NULL, showCategory = 200)
emapplot(gse, showCategory = 15)
# categorySize can be either 'pvalue' or 'geneNum'
cnetplot(gse, categorySize="pvalue", foldChange=gene_list, showCategory = 10, node_label= "category", cex_label_category=0.5)
cnetplot(gse, categorySize="pvalue", foldChange=gene_list, showCategory = 20, node_label= "category", cex_label_category=0.5)
ridgeplot(gse) + labs(x = "enrichment distribution") + theme(axis.text.y = element_text(size = 5))
## Picking joint bandwidth of 0.206
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P1 <- gseaplot(gse, y = "all", title = gse$Description[1], geneSetID = 1)
P1
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P2 <- gseaplot(gse, by = "all", title = gse$Description[2], geneSetID = 2)
P2
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P3<- gseaplot(gse, by = "all", title = gse$Description[3], geneSetID = 3)
P3
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P4<- gseaplot(gse, by = "all", title = gse$Description[4], geneSetID = 4)
P4
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P5<- gseaplot(gse, by = "all", title = gse$Description[5], geneSetID = 5)
P5
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P6<- gseaplot(gse, by = "all", title = gse$Description[6], geneSetID = 6)
P6
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P7<- gseaplot(gse, by = "all", title = gse$Description[7], geneSetID = 7)
P7
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P8<- gseaplot(gse, by = "all", title = gse$Description[8], geneSetID = 8)
P8
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P9<- gseaplot(gse, by = "all", title = gse$Description[9], geneSetID = 9)
P9
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P10<- gseaplot(gse, by = "all", title = gse$Description[10], geneSetID = 10)
P10
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P11<- gseaplot(gse, by = "all", title = gse$Description[11], geneSetID = 11)
P11
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P12<- gseaplot(gse, by = "all", title = gse$Description[12], geneSetID = 12)
P12
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P13<- gseaplot(gse, by = "all", title = gse$Description[13], geneSetID = 13)
P13
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P14<- gseaplot(gse, by = "all", title = gse$Description[14], geneSetID = 14)
P14
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P15<- gseaplot(gse, by = "all", title = gse$Description[15], geneSetID = 15)
P15
# Use the `Gene Set` param for the index in the title, and as the value for geneSetId
P16<- gseaplot(gse, by = "all", title = gse$Description[16], geneSetID = 16)
P16